Note. Boxplots display the interquartile range (IQR, center box), and the whiskers extend 1.5*IQR from the lower and upper hinge. The white point indicates the mean and the white center line indicates the median.


Data Preparation

In an initial preparatory step, we import the data into the R project environment and prepare the variables for further processing and later analyses.

Data Import

The data were collected using two different survey tools. For the study with sojourners (Study 1: worker) we used the survey platform Qualtrics XM, whereas the studies with international students (Study 2: student), and the international medical professionals (Study 3: medical) were conducted using the survey framework FormR. This means that the datasets had inconsistent file formats and naming conventions. For the Qualtrics study we pre-processed some variables to ease the import process (for the syntax files see the SPS files in ‘data/S1_Workers/processed/cleaned’ and for the raw data files see ‘data/S1_Workers/raw’). For the two other studies, we import the raw csv files from their respective folders.

# workers
# initial data cleaning was done in SPSS (syntax files are available in "")
dtWorker <- list(
  raw.pre = read_spss("data/S1_Workers/processed/cleaned/MT - Pre-Measure - 06-15-2018.sav"),
  raw.post = read_spss("data/S1_Workers/processed/cleaned/MT - Post-Measure - 06-15-2018.sav"),
  raw.morning = read_spss("data/S1_Workers/processed/cleaned/MT - Morning - 06-15-2018.sav"),
  raw.afternoon = read_spss("data/S1_Workers/processed/cleaned/MT - Afternoon - 06-15-2018.sav")
)

# students
dtStudents <- list(
  raw.pre = read.csv(file = "data/S2_Students/raw/AOTS_Pre.csv", header = T, sep = ","),
  raw.post = read.csv(file = "data/S2_Students/raw/AOTS_Post.csv", header = T, sep = ","),
  raw.daily = read.csv(file = "data/S2_Students/raw/AOTS_Daily.csv", header = T, sep = ",")
)

# young medical professionals
dtMedical <- list(
  raw.eligibility = read.csv("data/S3_Medical/raw/AOTM_Eligibility.csv"),
  raw.pre = read.csv("data/S3_Medical/raw/AOTM_Pre.csv"),
  raw.post = read.csv("data/S3_Medical/raw/AOTM_Post.csv"),
  raw.daily = read.csv("data/S3_Medical/raw/AOTM_Daily.csv")
)

Data Cleaning & Data Exclusions

Worker

For the sojourner sample data was collected in four separate surveys: (1) the pre-measurement, (2) the daily morning survey, (3) the daily afternoon survey, as well as (4) a post-measurement. We combine the four individual surveys into one cohesive dataframe and drop superfluous variables that are not relevant to the analyses relevant here. We then format the time and date variables and add person- and measurement indices (for easy and meaningful addressing of the data). We also exclude our own test data.
Note: All data preparation steps are saved in the ‘dtWorker’ list.

#  important names for Morning and Afternoon
names.m <- c(
  "StartDate",
  "EndDate",
  "Finished",
  "Duration__in_seconds_",
  "RecordedDate",
  "ExternalReference",
  "Meta_Operating_System",
  "Contact_dum",
  "number",
  "time",
  "duration_1",
  "dyad.group",
  "gr_size",
  "gr_type_1",
  "gr_type_2",
  "gr_type_3",
  "gr_type_4",
  "gr_type_5",
  "gr_type_6",
  "gr_type_7",
  "gr_type_8",
  "gr_type_9",
  "gr_type_10",
  "gr_type_11",
  "gr_type_12",
  "gr_type_13",
  "gr_type_14",
  "gr_type_15",
  "gr_type_16",
  "gr_type_17_TEXT",
  "gr_context_1",
  "gr_context_2",
  "gr_context_3",
  "gr_context_4",
  "gr_context_5",
  "gr_context_6",
  "gr_context_7",
  "gr_context_8",
  "gr_context_9",
  "gr_context_10",
  "gr_context_11",
  "gr_context_12",
  "gr_context_13_TEXT",
  "gr_context_14_TEXT",
  "gr_dutchness",
  "dyad_type_1",
  "dyad_type_2",
  "dyad_type_3",
  "dyad_type_4",
  "dyad_type_5",
  "dyad_type_6",
  "dyad_type_7",
  "dyad_type_8",
  "dyad_type_9",
  "dyad_type_10",
  "dyad_type_11",
  "dyad_type_12",
  "dyad_type_13",
  "dyad_type_14",
  "dyad_type_15",
  "dyad_type_16",
  "dyad_type_17_TEXT",
  "Context_1",
  "Context_2",
  "Context_3",
  "Context_4",
  "Context_5",
  "Context_6",
  "Context_7",
  "Context_8",
  "Context_9",
  "Context_10",
  "Context_11",
  "Context_12",
  "Context_13_TEXT",
  "Context_14_TEXT",
  "keyMotive",
  "keymotive_fulfillemt_1",
  "keyMotive_Dutch_1",
  "autonomy_1",
  "competence_1",
  "relatedness_self_1",
  "relatedness_other_1",
  "qualityAccidental_1",
  "qualityVoluntary_1",
  "qualityCooperative_1",
  "qualityDutchy_1",
  "quality_overall_1",
  "quality_meaning_1",
  "quality_star_1",
  "wantInt",
  "desire_type_1",
  "desire_type_2",
  "desire_type_3",
  "desire_type_4",
  "desire_type_5",
  "desire_type_6",
  "desire_type_7",
  "desire_type_8",
  "desire_type_9",
  "desire_type_10",
  "desire_type_11",
  "desire_type_12",
  "desire_type_13",
  "desire_type_14",
  "desire_type_15",
  "desire_type_16",
  "desire_type_17_TEXT",
  "desire_context_1",
  "desire_context_2",
  "desire_context_3",
  "desire_context_4",
  "desire_context_5",
  "desire_context_6",
  "desire_context_7",
  "desire_context_8",
  "desire_context_9",
  "desire_context_10",
  "desire_context_11",
  "desire_context_12",
  "desire_context_13_TEXT",
  "desire_context_14_TEXT",
  "Reason_nodesire",
  "keyMotive_noInt",
  "keyMotive_noInt_fulf_1",
  "autonomy_NoInt_1",
  "competence_NoInt_1",
  "relatedness_1_NoInt_1",
  "thermometerDutch_1",
  "thermometerDutchInt_2",
  "ExWB_1",
  "alertness1",
  "calmness1",
  "valence1",
  "alertness2",
  "calmness2",
  "valence2",
  "inNonDutch",
  "NonDutchNum",
  "NonDutchType_1",
  "NonDutchType_2",
  "NonDutchType_3",
  "NonDutchType_4",
  "NonDutchType_5",
  "NonDutchType_6",
  "NonDutchType_7",
  "NonDutchType_8",
  "NonDutchType_9",
  "NonDutchType_10",
  "NonDutchType_11",
  "NonDutchType_12",
  "NonDutchType_13",
  "NonDutchType_14",
  "NonDutchType_15_TEXT",
  "date",
  "time.0",
  "LocationLatitude",
  "LocationLongitude"
)

names.a <- c(names.m, "keyInteraction_1", "keyInteractionTime")

# Create reduced data sets for morning and afternoon
dat.mo <- dtWorker$raw.morning[, names.m]
dat.mo$daytime <- "morning"

dat.af <- dtWorker$raw.afternoon[, names.a]
dat.af$daytime <- "afternoon"

# merge morning and afternoon measurements with indicator [+ clean up]
daily.dat <- rbind.fill(dat.mo, dat.af)
daily.dat <- daily.dat[daily.dat$ExternalReference != 55951, ]
dtWorker$daily <- daily.dat
rm(dat.mo, dat.af, names.m, names.a, daily.dat)


# names for pre-measurement
names.pre <- c(
  "Finished",
  "age",
  "Gender",
  "Living",
  "roommate_1",
  "roommate_2",
  "roommate_3",
  "nationality",
  "SecondNationality",
  "timeNL_1",
  "Reason_2",
  "Reason_5",
  "Reason_7",
  "Reason_8_TEXT",
  "DutchLang",
  "occupation_1",
  "occupation_2",
  "occupation_3",
  "occupation_4",
  "occupation_7",
  "CurrentEducation_1",
  "education_level",
  "EduLang_2",
  "RUG_faculty",
  "Study.0",
  "association",
  "DutchMeetNum",
  "DutchFriends_1",
  "assimilation",
  "separation",
  "integration",
  "marginalization",
  "VIA_heritage",
  "VIA_Dutch",
  "SSAS_surrounding",
  "SSAS_privat",
  "SSAS_public",
  "autonomy",
  "relatedness",
  "competence",
  "anxiety",
  "swl",
  "alertness",
  "calmness",
  "valence",
  "date",
  "time",
  "City",
  "ZIP",
  "id"
)

# reduced data set for pre measurement
dat.pre.red <- dtWorker$raw.pre[, names.pre]

# merge with daily data [+ clean up]
df.pre <- merge(
  x = dtWorker$daily,
  y = dat.pre.red,
  by.x = "ExternalReference",
  by.y = "id",
  all = T
)
rm(names.pre)

# adjust duplicate names to fit to indicate daily or pre measurement
names(df.pre) <- gsub("[[:punct:]]x", ".daily", names(df.pre))
names(df.pre) <- gsub("[[:punct:]]y", ".pre", names(df.pre))

# names for post measurement
names.post <- c(
  "ExternalReference",
  "assimilation",
  "separation",
  "integration",
  "marginalization",
  "VIA_heritage",
  "VIA_Dutch",
  "anxiety",
  "swl",
  "rosenberg",
  "social_support",
  "stress",
  "discrimination",
  "discrimination_month",
  "NLE_1month",
  "NLE_6month",
  "NLE_12month"
)

# reduced data set for post-measurement
dat.post.red <- dtWorker$raw.post[, names.post]

# merge post measurement with pre- and daily data
df <- merge(
  x = df.pre,
  y = dat.post.red,
  by.x = "ExternalReference",
  by.y = "ExternalReference",
  all = T
)

# adjust duplicate names to indicate pre or post
names(df) <- gsub("[[:punct:]]x", ".pre", names(df))
names(df) <- gsub("[[:punct:]]y", ".post", names(df))

# add to list
dtWorker$combined <- df

# create data frame with cleaned data
df <- dtWorker$combined %>%
  filter(
    Finished.pre == 1,
    Finished.daily == 1,
    !is.na(ExternalReference)
  )

# add running number as measurement ID within participants
df$measureID <- rowidv(df, cols = c("ExternalReference"))

df <- df %>%
  mutate(
    PID = as.numeric(factor(ExternalReference)),
    # participant ID
    TID = measureID - 1,
    # time ID with t0 = 0 for meaningfull intercept interpretations
    date = substr(StartDate, 1, 10),
    # awkward way of extracting date (best converted to )
    time = substr(StartDate, 12, 19),
    # awkward way of extracting time
    daynum = as.numeric(factor(date)),
    # all days as numeric for ordering
    daycor = ifelse(
      daytime == "morning" &
        period_to_seconds(hms(time)) < period_to_seconds(hms("12:00:00")) |
        daytime == "afternoon" &
          period_to_seconds(hms(time)) < period_to_seconds(hms("19:00:00")),
      daynum - 1,
      daynum
    ),
    # correctly identify which date the questionnaire is about
    daycor.lead = sprintf("%02d", daycor),
    daytime.lt = ifelse(daytime == "morning", "a", "b"),
    # morning / afternoon to a / b
    day_time = paste(daycor.lead, daytime.lt, sep = "_"),
    # combine day id with morning / afternoon
    session = as.numeric(factor(day_time)),
    # day and time identifier as numeric id
    SubTime = chron::times(time.0),
    time.daily = as.character(time.daily),
    PPDate = as.Date(df$date.daily),
    number = replace_na(number, 0),
    NonDutchNum = replace_na(NonDutchNum, 0)
  )

dtWorker$clean <- df

# clean up
rm(df.pre, names.post, dat.post.red, dat.pre.red, df)

# Export reduced Data
# write.csv(dtWorker$clean, "data/processed/MT_clean-merged_07-05-2018.csv", row.names = F)
# save(dtWorker$clean, file = "data/processed/MT_clean-merged_07-05-2018.RData")

Student

For the student sample data was, similarly, collected in three separate surveys: (1) the pre-measurement, (2) the daily survey sent out at lunch and dinner time, and (3) a post-measurement. We combine the three individual surveys into one large dataframe and drop superfluous variables that are not relevant to the analyses relevant here. We exclude our own test data as well as one participant who entered the study twice (but gave different responses during the pre-measurement). We also reformat missing values and format core ID variables.
Note: All data preparation steps are saved in the ‘dtStudents’ list.

# our own test IDs
ownIDs <- c(
  "beautifulLionfishXXXR5rcgVBzGu8hPvOqrK8UBJBw4owvi9nfRFSFu3lMzYhE",
  "niceDogoXXXmB8JI5SFu78SF3DVof84mGUPPNUr14p2HYFTtp31a6D1OwAzM6F-K",
  "amusedQuailXXXmhuc_fpTp8vPkMwDH1BzjaH1d1kHSO1bsPEfsnaEYk4WeVBfPi",
  "juwGAbtXX0_1kmZtSVqKh3PGaHOICqUyU4iBkrT3nDsI_uifuD1gzKcZerxaM5FL"
)

# Prepare dfs for Cleaning
df.pre <- dtStudents$raw.pre %>%
  mutate_all(na_if, "") %>%
  mutate_all(na_if, "NA") %>%
  filter(!is.na(ended)) %>% # remove all who did not finish
  filter(!e_mail %in% .$e_mail[duplicated(.$e_mail)]) %>% # remove all who did the pre questionnaire multiple times (b/c inconsistent ratings scales)
  filter(!session %in% ownIDs) %>% # remove our own test
  mutate(session = as.character(session)) # turn factor into character strings (probably just precaution)

df.post <- dtStudents$raw.post %>%
  mutate_all(na_if, "") %>%
  mutate_all(na_if, "NA") %>%
  filter(!is.na(session)) %>% # remove own test runs
  filter(!session %in% ownIDs) %>% # remove our own test
  filter(session %in% df.pre$session) %>% # remove anyone who wasn't in the pre
  filter(!is.na(ended)) %>% # remove all who never finished
  filter(!session %in% .$session[duplicated(.$session)]) %>% # remove all duplicate sessions
  mutate(session = as.character(session)) # turn factor into character strings (probably just precaution)

df.daily <- dtStudents$raw.daily %>%
  mutate_all(na_if, "") %>%
  mutate_all(na_if, "NA") %>%
  filter(!session %in% ownIDs) %>% # remove our own test
  filter(session %in% df.pre$session) %>% # remove anyone who wasn't in the pre
  filter(!is.na(ended)) %>% # remove all who never finished
  mutate(session = as.character(session)) # turn factor into character strings (probably just precaution)

# merge daily with pre
dfPreDaily <- merge(
  x = df.daily,
  y = df.pre,
  by = "session",
  suffixes = c(".daily", ".pre"),
  all = F
)

# merge daily with post
dfCombined <- merge(
  x = dfPreDaily,
  y = df.post,
  by = "session",
  suffixes = c(".pre", ".post"),
  all = F
)

# add to list
dtStudents$clean <- dfCombined

# clean up workspace
rm(df.pre, df.daily, df.post, dfPreDaily, dfCombined, ownIDs)

Medical

For the medical professionals sample data was, again, collected in three separate surveys: (1) the pre-measurement, (2) the daily survey sent out at lunch and dinner time, and (3) a post-measurement. We combine the three individual surveys into one large dataframe. We exclude our own test data. We also reformat missing values and format core ID variables.
Note: All data preparation steps are saved in the ‘dtMedical’ list.

# our own test IDs
ownIDs <- c(
  "test_LeonieXXXSklxecPLW0-FBPM4796o3pUwUhAY5jb9KGw8jQsKxWmGpa1Jiy", 
  "test_MaxXXXtOp_5dTNefIq0yKXtXt2IN6eEKxeHoPY9mlyvdsqPpLp1B0NGg4UL",
  "test_JannisXXXBsNqk62fOpX6chbd2tMWPptUdjjnhAqnQ3uBqckZ7gLIEoPlfZ",
  "quaintLeopardCatXXXAJ9cfSj-_SZLwNwMDxv_xv_iyr1Bg5YFLTlYdrjW0UXZY",
  "blue-eyedIndianElephantXXXLf5zPMpQCDGS3umFzIj-YVky7ivTItvvozW49m"
)

# Prepare dfs for Cleaning
df.pre <- dtMedical$raw.pre %>%
  mutate_all(na_if, "") %>%
  mutate_all(na_if, "NA") %>%
  filter(!is.na(ended)) %>% # remove all who did not finish
  filter(!session %in% ownIDs) %>% # remove our own test
  mutate(session = as.character(session)) # turn factor into character strings (probably just precaution)

df.post <- dtMedical$raw.post %>%
  mutate_all(na_if, "") %>%
  mutate_all(na_if, "NA") %>% 
  filter(!is.na(session)) %>% # remove own test runs
  filter(!session %in% ownIDs) %>% # remove our own test
  filter(session %in% df.pre$session) %>% # remove anyone who wasn't in the pre
  #filter(!is.na(ended)) %>% # remove all who never finished [disabled because only relevant if data is missing]
  filter(!session %in% .$session[duplicated(.$session)]) %>% # remove all duplicate sessions
  mutate(session = as.character(session)) # turn factor into character strings (probably just precaution)

df.daily <- dtMedical$raw.daily %>%
  mutate_all(na_if, "") %>%
  mutate_all(na_if, "NA") %>%
  filter(!session %in% ownIDs) %>% # remove our own test
  filter(session %in% df.pre$session) %>% # remove anyone who wasn't in the pre
  #filter(!is.na(ended)) %>% # remove all who never finished [disabled because only relevant if data is missing]
  mutate(session = as.character(session)) # turn factor into character strings (probably just precaution)

# merge daily with pre
dfPreDaily <- merge(
  x = df.daily,
  y = df.pre,
  by = "session",
  suffixes = c(".daily", ".pre"),
  all = F
)

# merge daily with post
dfCombined <- merge(
  x = dfPreDaily,
  y = df.post,
  by = "session",
  suffixes = c(".pre", ".post"),
  all = F
)

# add to list
dtMedical$clean <- dfCombined

# clean up workspace
rm(df.pre, df.daily, df.post, dfPreDaily, dfCombined, ownIDs)

Calculate needed transformations

Worker

For the worker sample, the data transformation stage had three main aims:

  1. We first corrected time indicators within the surveys. In some cases participants completed their daily diary surveys for the afternoon after midnight. In these cases the measurement still is in reference to the previous day and is indicated in the corrected variable.
  2. We then created indices of scales. Some indices were multi-item scales while some indices combine equivalent measurement for different situational circumstances (e.g., competence perceptions after interactions and at measurement occasions without interactions).
  3. Finally, we calculated several basic participant summaries (averages across all measurement occasions).
df <- dtWorker$clean

# Time and Date Variables
# remove seconds from afternoon time
df$SubTime[df$daytime == "afternoon"] <- paste0(substring(as.character(df$time.0[df$daytime == "afternoon"]), 4, 8), ":00")
df$time.daily[df$daytime == "afternoon" &
  !is.na(df$time.daily != "<NA>")] <- paste0(substring(as.character(df$time.daily[df$daytime == "afternoon" &
  !is.na(df$time.daily != "<NA>")]), 4, 8), ":00")

# Correct morning / afternoon date where survey was collected the day after to indicate the correct date that was targeted
df$PPDate[df$SubTime < "11:50:00" &
  df$daytime == "morning"] <- df$PPDate[df$SubTime < "11:50:00" &
  df$daytime == "morning"] - 1
df$PPDate[df$SubTime < "18:50:00" &
  df$daytime == "afternoon"] <- df$PPDate[df$SubTime < "18:50:00" &
  df$daytime == "afternoon"] - 1

# Need scales
df$keyMotiveFulfilled <- rowSums(df[, c("keymotive_fulfillemt_1", "keyMotive_noInt_fulf_1")], na.rm = T)
df$autonomy.daily.all <- rowSums(df[, c("autonomy_1", "autonomy_NoInt_1")], na.rm = T)
df$competence.daily.all <- rowSums(df[, c("competence_1", "competence_NoInt_1")], na.rm = T)
# cor(df$relatedness_other_1, df$relatedness_self_1,use="complete.obs")
df$relatedness.daily.all <- rowMeans(df[, c(
  "relatedness_other_1",
  "relatedness_self_1",
  "relatedness_1_NoInt_1"
)], na.rm = T)

pairs.panels.new(
  df[c("relatedness_self_1", "relatedness_other_1")],
  labels = c(
    "I shared information about myself.",
    "X shared information about themselves."
  )
)

df$relatedness_1 <- rowMeans(df[, c("relatedness_other_1", "relatedness_self_1")], na.rm = T)

# summarize by participant (check that everything is within pp might not be the case for )
between <- df %>%
  group_by(ExternalReference) %>%
  mutate(
    CtContactNL = sum(Contact_dum),
    CtContactNonNl = sum(inNonDutch),
    CtContactNLAll = sum(number),
    CtContactNonNlAll = sum(NonDutchNum),
    AvKeyNeed = mean(keyMotiveFulfilled, na.rm = T),
    AvKeyNeedInt = mean(keymotive_fulfillemt_1, na.rm = T),
    AvKeyNeedNoInt = mean(keyMotive_noInt_fulf_1, na.rm = T),
    AvAutonomy = mean(autonomy.daily.all, na.rm = T),
    AvCompetence = mean(competence.daily.all, na.rm = T),
    AvRelatedness = mean(relatedness.daily.all, na.rm = T),
    AvThermo = mean(thermometerDutch_1, na.rm = T),
    AvWB = mean(ExWB_1, na.rm = T)
  ) %>%
  ungroup() %>%
  mutate(
    CtContactNL_c = scale(CtContactNL, scale = FALSE),
    AvKeyNeedInt_c = scale(AvKeyNeedInt, scale = FALSE),
    AvKeyNeed_c = scale(AvKeyNeed, scale = FALSE),
    CtContactNL_z = scale(CtContactNL, scale = TRUE),
    AvKeyNeedInt_z = scale(AvKeyNeedInt, scale = TRUE),
    AvKeyNeed_z = scale(AvKeyNeed, scale = TRUE)
  )

warning(
  "some variable transformations (esp. _c and _z) might be across all participants (i.e., not within PP). See next step."
)

dtWorker$full <- between
rm(df, between)

# dataframe where interaction types are recoded
workerInteractionType <- dtWorker$full %>%
  mutate(
    OutgroupInteraction = as_factor(Contact_dum),
    NonOutgroupInteraction = as_factor(inNonDutch)
  )

# Create variables centered and standardized within Participant
# i.e., divide into trait and state
workerWithinBetween <-
  MlTraitState(
    data = workerInteractionType,
    id = "PID",
    selection =
      c(
        "keyMotiveFulfilled",
        "competence.daily.all",
        "autonomy.daily.all",
        "relatedness.daily.all",
        "thermometerDutch_1",
        "keymotive_fulfillemt_1",
        "competence_1",
        "autonomy_1",
        "relatedness_1", 
        "quality_overall_1", 
        "OutgroupInteraction",
        "NonOutgroupInteraction"
      )
  )

workerOutWithinBetween <-
  MlTraitState(
    data = workerInteractionType %>% filter(OutgroupInteraction == "Yes"),
    id = "PID",
    selection =
      c(
        "keyMotiveFulfilled",
        "thermometerDutch_1",
        "keymotive_fulfillemt_1",
        "competence_1",
        "autonomy_1",
        "relatedness_1", 
        "quality_overall_1"
      )
  )


# Between participants contact frequency
workerContactFreq <- dtWorker$full %>%
  group_by(PID) %>%
  summarise(
    n = n(),
    SumContactNL = sum(Contact_dum),
    PercContactNL = SumContactNL / n * 100,
    SumContactNLAll = sum(number),
    AvAttitude = mean(thermometerDutch_1, na.rm = T)
  ) %>%
  mutate(
    WinSumContactNL = DescTools::Winsorize(SumContactNL),
    WinSumContactNLAll = DescTools::Winsorize(SumContactNLAll)
  )

# save cleaned data
# save(df.btw, file = "data/processed/df.btw.RData")
# write_sav(df.btw, "data/processed/MT_clean-merged_pre-post.sav")

# export data to Mplus
# df.mplus = remove_all_labels(select(df,
#                                     PID, session,
#                                     thermometerDutch_1, inNonDutch, Contact_dum,
#                                     keyMotiveFulfilled, autonomy.daily.all, competence.daily.all, relatedness.daily.all))
# names(df.mplus)= c("PID", "session", "att", "intin", "intout", "keymot", "aut", "comp", "rel")
# mplus = df.mplus[order(df.mplus$PID, df.mplus$session),]
# mplus.intcont = mplus[mplus$intout==1,]
# prepareMplusData(mplus.intcont, "data/processed/dynamic-subset-intonly.dat")

Student

For the student sample, the data transformation stage had five main aims:

  1. We first create person, survey type, and measurement ID variables.
  2. We then created indices of scales. Some indices were multi-item scales while some indices combine equivalent measurement for different situational circumstances (e.g., competence perceptions after interactions and at measurement occasions without interactions).
  3. We add information about the interaction partner to the beep during which a person was selected as an interaction partner.
  4. We cluster mean-center key variables within participants.
  5. Finally, we calculated several basic participant summaries (averages across all measurement occasions).
df <- dtStudents$clean

# Add ID variables
df$PID <- as.numeric(factor(df$session)) # participant ID

# order time
df$TID <-
  factor(df$date_period, levels = unique(dtStudents$raw.daily$date_period))
df$TIDnum <- as.numeric(df$TID) # get numeric TID

# check whether time ordering worked
df <- df %>%
  arrange(PID, TID) # %>%
# View()

# Interaction as Factor
df$interaction.f <-
  factor(df$Interaction,
    levels = c("no interaction", "Dutch", "Non-Dutch")
  )
df$intNL <- ifelse(df$Interaction == "Dutch", 1, 0)
df$intNonNL <- ifelse(df$Interaction == "Non-Dutch", 1, 0)

# -------------------------------------------------------------------------------------------------------------
#                                       Combine Variables
# -------------------------------------------------------------------------------------------------------------
# Relatedness
pairs.panels.new(
  df[c("RelatednessSelf", "RelatednessOther")],
  labels = c(
    "I shared information about myself.",
    "X shared information about themselves."
  )
)

df$RelatednessInteraction <-
  rowMeans(df[c("RelatednessSelf", "RelatednessOther")], na.rm = T)
df$RelatednessInteraction[df$RelatednessInteraction == "NaN"] <-
  NA
# Relatedness Overall (JANNIS NOT SURE THESE ARE CORRECT, CHANGE ROWS?; J: Changed "NaN" in df$RelatednessInteraction to NA() should work now)
df$Relatedness <-
  rowMeans(df[, c("RelatednessInteraction", "RelatednessNoInteraction")],
    na.rm =
      T
  )
# Pro-Sociality
df$ProSo <-
  rowMeans(df[, c("ProSo1", "ProSo2", "ProSo3", "ProSo4")], na.rm = T)
# Anti-Sociality
df$AntiSo <-
  rowMeans(df[, c("AntiSo1", "AntiSo2", "AntiSo3", "AntiSo4")], na.rm = T)


# -------------------------------------------------------------------------------------------------------------
#                                 Add Variables related to interaction partner
# -------------------------------------------------------------------------------------------------------------
# create function for later lapply
createIntPartDf <- function(inp) {
  # prepare the dataframe so that we can forloop over it later
  tmp <- data.frame(
    CC = as.character(inp$CC),
    NewCC = as.character(inp$NewCC),
    NewName = as.character(inp$NewName),
    NewCloseness = inp$NewCloseness,
    NewGender = inp$NewGender,
    NewEthnicity = as.character(inp$NewEthnicity),
    NewRelationship = as.character(inp$NewRelationship)
  )

  tmp$CC2 <- recode(tmp$CC, "SOMEONE ELSE" = "NA")
  tmp$CC2 <-
    ifelse(
      tmp$CC == 1 |
        tmp$CC == "SOMEONE ELSE",
      as.character(tmp$NewName),
      as.character(tmp$CC2)
    )
  # maybe add [[:space:]]\b to remove space before word boundary or ^[[:space:]] to remove space in the beginning of a string
  tmp$CC2 <- gsub("^[[:space:]]", "", tmp$CC2)
  tmp$NewName <- gsub("^[[:space:]]", "", tmp$NewName)

  # open the variables that will be filled up in the foor-loop
  tmp$closeness <- rep(NA, nrow(tmp))
  tmp$gender <- rep(NA, nrow(tmp))
  tmp$ethnicity <- rep(NA, nrow(tmp))
  tmp$relationship <- rep(NA, nrow(tmp))

  # Run the for-loop. It finds the variables related to the name of the interaction partner. If there is a repeating interaction
  # partner (i.e. CC2) it takes the value (i.e. NewCloseness) from the first interaction (i.e. NewName)
  for (i in 1:nrow(tmp)) {
    if (is.na(tmp$CC2[i])) {
      next
    } else {
      tmp$closeness[i] <-
        na.omit(tmp$NewCloseness[as.character(tmp$CC2[i]) == as.character(tmp$NewName)])[1] # find closeness where CC2 matches NewName (na.omit + [1] to get the number)
      tmp$gender[i] <-
        na.omit(tmp$NewGender[as.character(tmp$CC2[i]) == as.character(tmp$NewName)])[1] # (na.omit + [1] to get the number and not the rest of the na.omit list)
      tmp$ethnicity[i] <-
        na.omit(as.character(tmp$NewEthnicity[as.character(tmp$CC2[i]) == as.character(tmp$NewName)]))[1] # PROBLEM IS THAT THERE ARE TOO MANY NA's: Difficult to deal with
      tmp$relationship[i] <-
        na.omit(as.character(tmp$NewRelationship[as.character(tmp$CC2[i]) == as.character(tmp$NewName)]))[1]
    }
  }

  out <- tmp
  out
}

# split df per participants and run function
PP <- split(df, df$PID)
PP <- lapply(PP, createIntPartDf)
rm(createIntPartDf)

# add variables back to df
remergePP <- do.call(rbind.data.frame, PP)
colnames(remergePP) <-
  paste(colnames(remergePP), "_Calc", sep = "")
df <- cbind(df, remergePP)
rm(remergePP, PP)

# -------------------------------------------------------------------------------------------------------------
#                                 Center Relevant Variables
# -------------------------------------------------------------------------------------------------------------

df <- df %>%
  group_by(PID) %>%
  mutate(
    KeyNeedFullfillment.cm = mean(KeyNeedFullfillment, na.rm = TRUE),
    # cluster mean (mean of PP)
    KeyNeedFullfillment.cwc = KeyNeedFullfillment - KeyNeedFullfillment.cm,
    # cluster mean centered (within PP centered)
    closeness.cm = mean(closeness_Calc, na.rm = TRUE),
    closeness.cwc = closeness_Calc - closeness.cm
  ) %>%
  ungroup()

# store
dtStudents$full <- df
rm(df)

# Between participants contact frequency
studentContactFreq <- dtStudents$full %>%
  group_by(PID) %>%
  summarise(
    n = n(),
    SumContactNL = sum(InteractionDumDutch),
    PercContactNL = SumContactNL / n * 100,
    SumContactNLAll = sum(ContactNum[InteractionDumDutch == 1], na.rm = TRUE),
    AvAttitude = mean(AttitudesDutch, na.rm = TRUE),
    AvQuality = mean(quality_overall, na.rm = TRUE)
  ) %>%
  mutate(
    WinSumContactNL = DescTools::Winsorize(SumContactNL),
    WinSumContactNLAll = DescTools::Winsorize(SumContactNLAll)
  )

# dataframe where interaction types are recoded
studentInteractionType <- dtStudents$full %>%
  mutate(
    NonDutchContact = tidyr::replace_na(NonDutchContact, 2), # make second non-Dutch countable
    NonDutchContact = NonDutchContact*-1+2 # recode (yes = 1 -> 1, no = 2 -> 0)
  ) %>%
  mutate(
    OutgroupInteraction = factor(
      InteractionDumDutch,
      levels = c(0, 1),
      labels = c("No", "Yes")
    ),
    NonOutgroupInteraction = factor(
      rowSums(select(., c(InteractionDumNonDutch, NonDutchContact))), # combine the two non-Dutch Q.,
      levels = c(0, 1),
      labels = c("No", "Yes")
    )
  )

# select a subset of IDs to display in plots
studentPltIDs <-
  studentInteractionType %>%
  group_by(PID) %>%
  summarise(n = n()) %>%
  slice_max(n, n = 20) %>% # chose the 20 with the most number of measurements
  select(PID) %>%
  as.matrix %>%
  as.vector

# select a subset of IDs to display in plots (only outgroup interactions)
studentOutPltIDs <-
  studentInteractionType %>%
  filter(OutgroupInteraction == "Yes") %>%
  group_by(PID) %>%
  summarise(n = n()) %>%
  slice_max(n, n = 20) %>% # chose the 20 with the most number of measurements
  select(PID) %>%
  as.matrix %>%
  as.vector

# Center within and between
# divide into trait and state
studentWithinBetween <-
  MlTraitState(
    data = studentInteractionType,
    id = "PID",
    selection =
      c(
        "KeyNeedFullfillment",
        "Competence",
        "Autonomy",
        "Relatedness",
        "AttitudesDutch",
        "quality_overall",
        "OutgroupInteraction",
        "NonOutgroupInteraction"
      )
  )
studentOutWithinBetween <-
  MlTraitState(
    data = studentInteractionType %>% filter(OutgroupInteraction == "Yes"),
    id = "PID",
    selection =
      c(
        "KeyNeedFullfillment",
        "Competence",
        "Autonomy",
        "Relatedness",
        "AttitudesDutch",
        "quality_overall"
      )
  )

Medical

For the medical professional sample, the data transformation stage had five main aims:

  1. We first create person, survey type, and measurement ID variables.
  2. We then created indices of scales. Some indices were multi-item scales while some indices combine equivalent measurement for different situational circumstances (e.g., competence perceptions after interactions and at measurement occasions without interactions).
  3. We cluster mean-center key variables within participants.
  4. Finally, we calculated several basic participant summaries (averages across all measurement occasions).
df <- dtMedical$clean

# Add ID variables
df$PID <- as.numeric(factor(df$session)) # participant ID

# order time
df$TID <-
  factor(df$date_period, levels = unique(dtMedical$raw.daily$date_period))
df$TIDnum <- as.numeric(df$TID) # get numeric TID

# check whether time ordering worked
df <- df %>%
  arrange(PID, TID) # %>%
# View()

# Interaction as Factor
df$interaction.f <-
  factor(df$Interaction,
    levels = c("no interaction", "Dutch", "Non-Dutch")
  )
df$intNL <- ifelse(df$Interaction == "Dutch", 1, 0)
df$intNonNL <- ifelse(df$Interaction == "Non-Dutch", 1, 0)

df <- df %>%
  mutate(
    NonDutchContact = replace_na(NonDutchNum, 0), # make second non-Dutch countable
    NonDutchContact = ifelse(NonDutchContact > 1, 1, 0) # recode (yes = 1 -> 1, no = 2 -> 0)
  ) %>%
  mutate(
    OutgroupInteraction = factor(
      InteractionDumDutch,
      levels = c(0, 1),
      labels = c("No", "Yes")
    ),
    NonOutgroupInteraction = factor(
      rowSums(select(., c(InteractionDumNonDutch, NonDutchContact)), na.rm = TRUE), # combine the two non-Dutch Q.,
      levels = c(0, 1),
      labels = c("No", "Yes")
    )
  )



# -------------------------------------------------------------------------------------------------------------
#                                       Combine Variables
# -------------------------------------------------------------------------------------------------------------
# Relatedness
pairs.panels.new(
  df[c("RelatednessSelf", "RelatednessOther")],
  labels = c(
    "I shared information about myself.",
    "X shared information about themselves."
  )
)

df$RelatednessInteraction <-
  rowMeans(df[c("RelatednessSelf", "RelatednessOther")], na.rm = T)
df$RelatednessInteraction[df$RelatednessInteraction == "NaN"] <-
  NA
# Relatedness Overall (JANNIS NOT SURE THESE ARE CORRECT, CHANGE ROWS?; J: Changed "NaN" in df$RelatednessInteraction to NA() should work now)
df$Relatedness <-
  rowMeans(df[, c("RelatednessInteraction", "RelatednessNoInteraction")],
           na.rm = TRUE) %>%
  ifelse(is.nan(.), NA, .)


df$DaytimeNeedFullfillment[df$InteractionDum == 0 & !is.na(df$KeyNeedFulfillment)] <-
  df$KeyNeedFulfillment[df$InteractionDum == 0 & !is.na(df$KeyNeedFulfillment)]

df$DaytimeNeedImportance <- NA 
df$DaytimeNeedImportance[df$InteractionDum == 1 & !is.na(df$DaytimeNeedFulfillment)] <- 
  df$DaytimeNeedFulfillment[df$InteractionDum == 1 & !is.na(df$DaytimeNeedFulfillment)]


df$InteractionNeedFullfillment <- NA 
df$InteractionNeedFullfillment[df$InteractionDum == 1 & !is.na(df$KeyNeedFulfillment)] <- 
  df$KeyNeedFulfillment[df$InteractionDum == 1 & !is.na(df$KeyNeedFulfillment)]

df$InteractionNeedImportance <- NA 
df$InteractionNeedImportance[df$InteractionDum == 1 & !is.na(df$KeyNeedImp)] <- 
  df$KeyNeedImp[df$InteractionDum == 1 & !is.na(df$KeyNeedImp)]


# Pro-Sociality
df$ProSo <-
  rowMeans(df[, c("ProSo1", "ProSo2", "ProSo3", "ProSo4")], na.rm = T)
# Anti-Sociality
df$AntiSo <-
  rowMeans(df[, c("AntiSo1", "AntiSo2", "AntiSo3", "AntiSo4")], na.rm = T)

# Allport's Conditions
df %>%
  #filter(OutgroupInteraction == "Yes") %>%
  select(
    InteractionContextEqualStatus,
    KeyNeedShared,
    InteractionContextCooperative,
    InteractionContextvoluntary
  ) %>%
  pairs.panels.new

df %>%
  #filter(OutgroupInteraction == "Yes") %>%
  select(
    InteractionContextEqualStatus,
    KeyNeedShared,
    InteractionContextCooperative,
    InteractionContextvoluntary
  ) %>%
  psych::describe(., skew=F,ranges=T) %>%
  as.data.frame() %>%
  select(-vars) %>%
  kable(., caption = "Descriptives of Allport's Condition items") %>% 
  kable_styling("hover", full_width = F, latex_options = "hold_position")
Table 1: Descriptives of Allport’s Condition items
n mean sd min max range se
InteractionContextEqualStatus 3099 81.84 23.58 0 100 100 0.4236
KeyNeedShared 3110 84.90 18.74 0 100 100 0.3360
InteractionContextCooperative 3099 85.67 18.35 0 100 100 0.3296
InteractionContextvoluntary 3099 84.14 22.28 0 100 100 0.4002
iaWorkerAllport <- 
  df %>%
  #filter(OutgroupInteraction == "Yes") %>%
  select(
    InteractionContextEqualStatus,
    KeyNeedShared,
    InteractionContextCooperative,
    InteractionContextvoluntary
  )

sjPlot::tab_itemscale(iaWorkerAllport)
Component 1
Missings Mean SD Skew Item Difficulty Item Discrimination α if deleted
24.54 % 81.84 23.58 -1.43 0.82 0.52 0.64
24.28 % 84.9 18.74 -1.78 0.85 0.42 0.69
24.54 % 85.67 18.35 -1.55 0.86 0.60 0.59
24.54 % 84.14 22.28 -1.7 0.84 0.47 0.67
Mean inter-item-correlation=0.386 · Cronbach’s α=0.709
pca <- parameters::principal_components(iaWorkerAllport)
factor.groups <- parameters::closest_component(pca)

sjPlot::tab_itemscale(iaWorkerAllport, factor.groups)
Component 1
Missings Mean SD Skew Item Difficulty Item Discrimination α if deleted
24.54 % 81.84 23.58 -1.43 0.82 0.52 0.64
24.28 % 84.9 18.74 -1.78 0.85 0.42 0.69
24.54 % 85.67 18.35 -1.55 0.86 0.60 0.59
24.54 % 84.14 22.28 -1.7 0.84 0.47 0.67
Mean inter-item-correlation=0.386 · Cronbach’s α=0.709
ltm::cronbach.alpha(na.omit(iaWorkerAllport), CI = TRUE)
## 
## Cronbach's alpha for the 'na.omit(iaWorkerAllport)' data-set
## 
## Items: 4
## Sample units: 3099
## alpha: 0.709
## 
## Bootstrap 95% CI based on 1000 samples
##  2.5% 97.5% 
## 0.685 0.730
data <- 
  df %>%
  select(
    PID,
    TIDnum,
    InteractionContextEqualStatus,
    KeyNeedShared,
    InteractionContextCooperative,
    InteractionContextvoluntary
  ) %>%
  drop_na %>%
  melt(
    ., 
    id.vars = c("PID", "TIDnum")
  )


horst::nestedAlpha(item.level.1 = "value",
                   level.2      = "TIDnum",
                   level.3      = "PID",
                   data         = data)
##  alpha 
## 0.7829
rm(data)

iaWorkerAllportScale <- 
  iaWorkerAllport %>%
  Scale::Scale() %>%
  Scale::ItemAnalysis()

df$AllportsCondition <-
  scoreItems(
    keys = c(1, 1, 1, 1),
    items = df %>% select(
      InteractionContextEqualStatus,
      KeyNeedShared,
      InteractionContextCooperative,
      InteractionContextvoluntary
    ),
    min = 0,
    max = 100
  )$scores

as.data.frame(psych::describe(df$AllportsCondition, skew=T)) %>%
  mutate(vars = "Allport's Conditions Index") %>%
  kable(., caption = "Allport's Conditions: Scale Descriptives", row.names = FALSE) %>% 
  kable_styling("hover", full_width = F, latex_options = "hold_position")
Table 1: Allport’s Conditions: Scale Descriptives
vars n mean sd median trimmed mad min max range skew kurtosis se
Allport’s Conditions Index 4107 86.49 13.88 93.75 88.6 9.266 0 100 100 -1.454 2.406 0.2165
ggplot(df, aes(x = AllportsCondition)) +
  geom_histogram()

# -------------------------------------------------------------------------------------------------------------
#                                 Add Variables related to interaction partner
# -------------------------------------------------------------------------------------------------------------
# create function for later lapply
createIntPartDf <- function(inp) {
  # prepare the dataframe so that we can forloop over it later
  tmp <- data.frame(
    CC = as.character(inp$CC),
    NewCC = as.character(inp$NewCC),
    NewName = as.character(inp$NewName),
    NewCloseness = inp$NewCloseness,
    NewGender = inp$NewGender,
    NewEthnicity = as.character(inp$NewEthnicity),
    NewRelationship = as.character(inp$NewRelationship)
  )

  tmp$CC2 <- recode(tmp$CC, "SOMEONE ELSE" = "NA")
  tmp$CC2 <-
    ifelse(
      tmp$CC == 1 |
        tmp$CC == "SOMEONE ELSE",
      as.character(tmp$NewName),
      as.character(tmp$CC2)
    )
  # maybe add [[:space:]]\b to remove space before word boundary or ^[[:space:]] to remove space in the beginning of a string
  tmp$CC2 <- gsub("^[[:space:]]", "", tmp$CC2)
  tmp$NewName <- gsub("^[[:space:]]", "", tmp$NewName)

  # open the variables that will be filled up in the foor-loop
  tmp$closeness <- rep(NA, nrow(tmp))
  tmp$gender <- rep(NA, nrow(tmp))
  tmp$ethnicity <- rep(NA, nrow(tmp))
  tmp$relationship <- rep(NA, nrow(tmp))

  # Run the for-loop. It finds the variables related to the name of the interaction partner. If there is a repeating interaction
  # partner (i.e. CC2) it takes the value (i.e. NewCloseness) from the first interaction (i.e. NewName)
  for (i in 1:nrow(tmp)) {
    if (is.na(tmp$CC2[i])) {
      next
    } else {
      tmp$closeness[i] <-
        na.omit(tmp$NewCloseness[as.character(tmp$CC2[i]) == as.character(tmp$NewName)])[1] # find closeness where CC2 matches NewName (na.omit + [1] to get the number)
      tmp$gender[i] <-
        na.omit(tmp$NewGender[as.character(tmp$CC2[i]) == as.character(tmp$NewName)])[1] # (na.omit + [1] to get the number and not the rest of the na.omit list)
      tmp$ethnicity[i] <-
        na.omit(as.character(tmp$NewEthnicity[as.character(tmp$CC2[i]) == as.character(tmp$NewName)]))[1] # PROBLEM IS THAT THERE ARE TOO MANY NA's: Difficult to deal with
      tmp$relationship[i] <-
        na.omit(as.character(tmp$NewRelationship[as.character(tmp$CC2[i]) == as.character(tmp$NewName)]))[1]
    }
  }

  out <- tmp
  out
}

# split df per participants and run function
PP <- split(df, df$PID)
PP <- lapply(PP, createIntPartDf)
rm(createIntPartDf)

# add variables back to df
remergePP <- do.call(rbind.data.frame, PP)
colnames(remergePP) <-
  paste(colnames(remergePP), "_Calc", sep = "")
df <- cbind(df, remergePP)
rm(remergePP, PP)

# -------------------------------------------------------------------------------------------------------------
#                                 Center Relevant Variables
# -------------------------------------------------------------------------------------------------------------
# divide into trait and state
medicalOutWithinBetween <-
  MlTraitState(
    data = df %>% filter(OutgroupInteraction == "Yes"),
    id = "PID",
    selection =
      c(
        "KeyNeedFulfillment",
        "Competence",
        "Autonomy",
        "Relatedness",
        "AllportsCondition",
        "AttitudesDutch",
        "qualityOverall"
      )
  )

medicalWithinBetween <-
  MlTraitState(
    data = df,
    id = "PID",
    selection =
      c(
        "KeyNeedFulfillment",
        "Competence",
        "Autonomy",
        "Relatedness",
        "AllportsCondition",
        "AttitudesDutch",
        "qualityOverall",
        "OutgroupInteraction",
        "NonOutgroupInteraction"
      )
  )

df <- # keep only for compatibility of old framgents
  MlTraitState(
    data = df,
    id = "PID",
    selection =
      c(
        "KeyNeedFulfillment",
        "Competence",
        "Autonomy",
        "Relatedness",
        "AllportsCondition",
        "AttitudesDutch",
        "qualityOverall"
      )
  )

# store
dtMedical$full <- df
rm(df)


# Between participants contact frequency
medicalContactFreq <- 
  dtMedical$full %>%
  group_by(PID) %>%
  summarise(
    n = n(),
    SumContactNL = sum(InteractionDumDutch, na.rm = TRUE),
    PercContactNL = SumContactNL / n * 100,
    SumContactNLAll = sum(ContactNum[InteractionDumDutch == 1], na.rm = TRUE),
    AvAttitude = mean(AttitudesDutch, na.rm = TRUE),
    AvQuality = mean(qualityOverall, na.rm = TRUE)
  ) %>%
  mutate(
    WinSumContactNL = DescTools::Winsorize(SumContactNL),
    WinSumContactNLAll = DescTools::Winsorize(SumContactNLAll)
  )

# select a subset of IDs to display in plots
medicalPltIDs <-
  dtMedical$full %>%
  group_by(PID) %>%
  summarise(n = n()) %>%
  slice_max(n, n = 20) %>% # chose the 20 with the most number of measurements
  select(PID) %>%
  as.matrix %>%
  as.vector

# select a subset of IDs to display in plots (only outgroup interactions)
medicalOutPltIDs <-
  dtMedical$full %>%
  filter(OutgroupInteraction == "Yes") %>%
  group_by(PID) %>%
  summarise(n = n()) %>%
  slice_max(n, n = 20) %>% # chose the 20 with the most number of measurements
  select(PID) %>%
  as.matrix %>%
  as.vector

Data Availability and Time Scales

dtworkerMissing <- dtWorker$full %>%
  select(
    PID,
    Finished.daily,
    PPDate,
    daytime,
    TID
  ) %>%
  mutate(
    date_period = paste(PPDate, daytime, sep = " "),
    available = 1
  )

library(reshape)
dtworkerAvailability <- reshape::cast(dtworkerMissing, PID ~ TID)
write.csv(dtworkerAvailability, "data/S1_Workers/processed/workerAvailability.csv")
dtstudentMissing <- dtStudents$full %>%
  select(
    PID,
    date.daily,
    periodMA,
    date_period,
    TID
  ) %>%
  mutate(
    TID = as.numeric(TID),
    available = 1
  )

dtstudentAvailability <- reshape::cast(dtstudentMissing, PID ~ TID)
write.csv(dtstudentAvailability, "data/S2_Students/processed/studentAvailability.csv")
dtMedicalMissing <- dtMedical$full %>%
  select(
    PID,
    date.daily,
    periodMA,
    date_period,
    TID
  ) %>%
  mutate(
    TID = as.numeric(TID),
    available = 1
  )

dtMedicalAvailability <- reshape::cast(dtMedicalMissing, PID ~ TID)
write.csv(dtMedicalAvailability, "data/S3_Medical/processed/medicalAvailability.csv")

Mean Plots per Item

Study 1 (Worker):

Study 2 (Student):

Study 3 (Medical):

varListMedical <- 
  c(
    # "KeyNeedFulfillment",
    # "KeyNeedImp",
    "DaytimeNeedFullfillment",
    "DaytimeNeedImportance",
    "InteractionNeedFullfillment",
    "InteractionNeedImportance",
    "AntiSo1",
    "AntiSo2",
    "AntiSo3",
    "AntiSo4",
    "AntiSo5",
    "AntiSo6",
    "AntiSo7",
    "ProSo1",
    "ProSo2",
    "ProSo3",
    "ProSo4",
    "StudentGoal01",
    "StudentGoal02",
    "StudentGoal03",
    "StudentGoal04",
    "StudentGoal05",
    "StudentGoal06",
    "StudentGoal07",
    "StudentGoal08",
    "StudentGoal09",
    "StudentGoal10",
    "Autonomy",
    "Competence",
    # Break relatedness up into interaction vs. no interaction(?)
    # vars are already calculated
    "Relatedness",
    "AttitudesDutch",
    "AttitudesPartner",
    "exWB",
    "angry",
    "afraid",
    "energy",
    "lonelyAlways",
    "Event"
  )

varNameListMedical <- 
  c(
    # "KeyNeedFulfillment" = "Key Need Fulfillment",
    # "KeyNeedImp" = "Key Need Importance",
    "DaytimeNeedFullfillment" = "Daytime Core Motive Fulfillment",
    "DaytimeNeedImportance" = "Daytime Core Motive Importance",
    "InteractionNeedFullfillment" = "Interaction Core Motive Fulfillment",
    "InteractionNeedImportance" = "Interaction Core Motive Importance",
    "AntiSo1" = "Put someone down",
    "AntiSo2" = "Show little attention in someones opinion",
    "AntiSo3" = "Demeaning remarks",
    "AntiSo4" = "Inpropperly addressing someone",
    "AntiSo5" = "Ignored or excluded someone",
    "AntiSo6" = "Doubt someones judgement",
    "AntiSo7" = "Unwanted attempts of personal matters",
    "ProSo1" = "Listen to someones problems",
    "ProSo2" = "Cheer someone up",
    "ProSo3" = "Help someone get things done",
    "ProSo4" = "Help someone with responsibilities",
    "StudentGoal01" = "Social support and connectedness",
    "StudentGoal02" = "Romantic or sexual relationship",
    "StudentGoal03" = "Academic",
    "StudentGoal04" = "Career",
    "StudentGoal05" = "Financial",
    "StudentGoal06" = "Health and fitness",
    "StudentGoal07" = "Leasure and fun",
    "StudentGoal08" = "Personal improvement and growth",
    "StudentGoal09" = "Service and help",
    "StudentGoal10" = "Spiritual or religious",
    "Autonomy" = "Autonomy",
    "Competence" = "Competence",
    # Break relatedness up into interaction vs. no interaction(?)
    # vars are already calculated
    "Relatedness" = "Relatedness",
    "AttitudesDutch" = "Outgroup Attitude",
    "AttitudesPartner" = "Attitude Interaction Partner",
    "exWB" = "Sadness Happiness",
    "angry" = "Anger",
    "afraid" = "Anxiety",
    "energy" = "Energy",
    "lonelyAlways" = "Loneliness",
    "Event" = "Positive or negative event present"
  )

dtMedical$viz <- list()

dtMedical$vizall <- dtMedical$full %>%
  select(
    PID,
    TID,
    TIDnum,
    varListMedical
  )

dtMedical$viz$DaytimeNeed <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    starts_with("DaytimeNeed")
  )

dtMedical$viz$InteractionNeed <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    starts_with("InteractionNeed")
  )

dtMedical$viz$AntiSocialBehaviorPt1 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    AntiSo1,
    AntiSo2,
    AntiSo3
  )

dtMedical$viz$AntiSocialBehaviorPt2 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    AntiSo4,
    AntiSo5
  )

dtMedical$viz$AntiSocialBehaviorPt3 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    AntiSo6,
    AntiSo7
  )

dtMedical$viz$ProSocialBehaviorPt1 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    ProSo1,
    ProSo2
  )

dtMedical$viz$ProSocialBehaviorPt2 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    ProSo3,
    ProSo4
  )

dtMedical$viz$GoalsPt1 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    StudentGoal01,
    StudentGoal02
  )

dtMedical$viz$GoalsPt2 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    StudentGoal03,
    StudentGoal04
  )

dtMedical$viz$GoalsPt3 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    StudentGoal05,
    StudentGoal06
  )

dtMedical$viz$GoalsPt4 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    StudentGoal07,
    StudentGoal08
  )

dtMedical$viz$GoalsPt5 <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    StudentGoal09,
    StudentGoal10
  )

dtMedical$viz$SDT <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    Autonomy,
    Relatedness, 
    Competence
  )

dtMedical$viz$Attitudes <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    starts_with("Attitudes")
  )

dtMedical$viz$WB <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    exWB
  )

dtMedical$viz$Emotion <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    angry,
    afraid,
    energy,
    lonelyAlways
  )

dtMedical$viz$Event <- dtMedical$vizall %>%
  select(
    PID,
    TID,
    TIDnum,
    Event
  )


lWideToLong <- function(data, ...){
  reshape2::melt(
    data, 
    id.vars = c("PID", "TIDnum")
  )
}
  

dtMedical$vizLong <- lapply(
  dtMedical$viz,
  lWideToLong
)
lWideToLong <- function(data, ...){
  reshape2::melt(
    data, 
    id.vars = c("PID", "TID", "TIDnum")
  )
}


meanVarPlot <- function(dataIn, name, namList,...){
  # dataIn <- dtMedical$viz$InteractionNeed
  # name <- "Interaction Core Need"
  # namList <- varNameListMedical
  dataLong <- dataIn %>%
    mutate(TID = stri_replace_all_regex(
      TID,
      pattern = c('Morning', 'Afternoon'),
      replacement = c('12:00:00', '19:00:00'),
      vectorize = FALSE
    ) %>% as.POSIXct) %>%
    lWideToLong %>%
    mutate(variable = recode(variable, !!!namList))
  
  
  
  pTidMain <-
    ggplot(dataLong,
           aes(x = TIDnum, y = value, color = variable)) +
    geom_point(alpha = 0) +
    stat_summary(fun = mean, geom = "line") +
    labs(y = "Mean per Day",
         x = "Time Index") +
    #scale_colour_manual(values = RColorBrewer::brewer.pal(length(unique(dataLong$variable)), "Set3")) +
    ggthemes::scale_colour_calc() +
    scale_x_continuous(breaks = seq(0, max(dataLong$TIDnum), 25)) +
    theme_Publication() +
    theme(
      panel.border = element_rect(colour = "black"),
      plot.margin = margin(0, 10, 10, 10, "mm")
    )
  pTidTop <-
    ggplot(dataLong %>% filter(!is.na(value)), aes(x = TIDnum, y = (..count.. / max(count)) * 100),
           color = variable) +
    geom_freqpoly(binwidth = 2) +
    geom_hline(yintercept = 80,
               linetype = "dashed",
               color = "black") +
    annotate(
      # add white background
      "label",
      x = max(dataLong$TIDnum),
      y = 80,
      label = "80%",
      vjust = 0.5,
      hjust = -1,
      label.size = NA
    )  +
    coord_cartesian(clip = "off") +
    labs(y = "Percentage") +
    #scale_colour_manual(values = rep("black", 20)) +
    theme_classic() +
    theme(
      legend.position = "none",
      axis.line.x = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      plot.margin = margin(0, 10, 0, 10, "mm")
    )
  
  pTidTitle <- cowplot::ggdraw() +
    cowplot::draw_label(
      paste("Variable Set:", name, "[Time Id Plot]"),
      fontface = 'bold',
      x = 0.5,
      hjust = 0.5
    )
  
  pTid <- cowplot::plot_grid(
    pTidTitle,
    pTidTop,
    pTidMain,
    align = "v",
    nrow = 3,
    rel_heights = c(0.1, 1 / 5, 4 / 5)
  )

  pDateMain <-
    ggplot(dataLong,
           aes(x = TID, y = value, color = variable)) +
    geom_point(alpha = 0) +
    stat_summary(fun = mean, geom = "line") +
    labs(y = "Mean per Day",
         x = "Date") +
    #scale_colour_manual(values = RColorBrewer::brewer.pal(length(unique(dataLong$variable)), "Set3")) +
    ggthemes::scale_colour_calc() +
    scale_x_datetime(breaks = scales::date_breaks("10 day")) +
    theme_Publication() +
    theme(
      panel.border = element_rect(colour = "black"),
      plot.margin = margin(0, 10, 10, 10, "mm")
    )
  pDateTop <-
    ggplot(dataLong,
           aes(
             x = TID,
             y = (..count.. / max(count)) * 100,
             color = variable
           )) +
    geom_freqpoly(binwidth = 1 * 3600 * 24) +
    geom_hline(yintercept = 80,
               linetype = "dashed",
               color = "black") +
    annotate(
      # add white background
      "label",
      x = max(unique(dataLong$TID)),
      y = 80,
      label = "80%",
      vjust = 0.5,
      hjust = -1,
      label.size = NA
    )  +
    coord_cartesian(clip = "off") +
    labs(y = "Percentage") +
    scale_colour_manual(values = rep("black", 20)) +
    theme_classic() +
    theme(
      legend.position = "none",
      axis.line.x = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      plot.margin = margin(0, 10, 0, 10, "mm")
    )

  pDateTitle <- cowplot::ggdraw() +
    cowplot::draw_label(
      paste("Variable Set: Key-Need", "[Date Plot]"),
      fontface = 'bold',
      x = 0.5,
      hjust = 0.5
    )

  pDate <- cowplot::plot_grid(
    pDateTitle,
    pDateTop,
    pDateMain,
    align = "v",
    nrow = 3,
    rel_heights = c(0.1, 1 / 5, 4 / 5)
  )
  
  list(pTid, pDate)
  pTid
}

Software Information

The full session information with all relevant system information and all loaded and installed packages is available in the collapsible section below.

System Info
Table 2: R environment session info for reproducibility of results
Setting Value
version R version 4.1.1 (2021-08-10)
os macOS Big Sur 10.16
system x86_64, darwin17.0
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Amsterdam
date 2022-05-05

Package Info
Table 3: Package info for reproducibility of results
Package Loaded version Date Source
bookdown 0.24 2021-09-02 CRAN (R 4.1.0)
brms 2.16.1 2021-08-23 CRAN (R 4.1.0)
data.table 1.14.0 2021-02-21 CRAN (R 4.1.0)
devtools 2.4.2 2021-06-07 CRAN (R 4.1.0)
dplyr 1.0.9 2022-04-28 CRAN (R 4.1.2)
dygraphs 1.1.1.6 2018-07-11 CRAN (R 4.1.0)
ellipse 0.4.2 2020-05-27 CRAN (R 4.1.0)
Formula 1.2-4 2020-10-16 CRAN (R 4.1.0)
ggpattern 0.2.0 2021-10-11 Github ()
ggplot2 3.3.6 2022-05-03 CRAN (R 4.1.1)
ggthemes 4.2.4 2021-01-20 CRAN (R 4.1.0)
gridExtra 2.3 2017-09-09 CRAN (R 4.1.0)
gtsummary 1.4.2 2021-07-13 CRAN (R 4.1.0)
haven 2.4.3 2021-08-04 CRAN (R 4.1.0)
Hmisc 4.5-0 2021-02-28 CRAN (R 4.1.0)
jtools 2.1.4 2021-09-03 CRAN (R 4.1.0)
kableExtra 1.3.4 2021-02-20 CRAN (R 4.1.0)
knitr 1.39 2022-04-26 CRAN (R 4.1.2)
lattice 0.20-44 2021-05-02 CRAN (R 4.1.1)
lme4 1.1-29 2022-04-07 CRAN (R 4.1.2)
lubridate 1.7.10 2021-02-26 CRAN (R 4.1.0)
mada 0.5.10 2020-05-25 CRAN (R 4.1.0)
Matrix 1.3-4 2021-06-01 CRAN (R 4.1.1)
metafor 3.0-2 2021-06-09 CRAN (R 4.1.0)
mvmeta 1.0.3 2019-12-10 CRAN (R 4.1.0)
mvtnorm 1.1-2 2021-06-07 CRAN (R 4.1.0)
nlme 3.1-152 2021-02-04 CRAN (R 4.1.1)
pander 0.6.4 2021-06-13 CRAN (R 4.1.0)
papaja 0.1.0.9997 2021-10-11 Github ()
plotly 4.10.0 2021-10-09 CRAN (R 4.1.0)
plyr 1.8.6 2020-03-03 CRAN (R 4.1.0)
psych 2.1.9 2021-09-22 CRAN (R 4.1.0)
purrr 0.3.4 2020-04-17 CRAN (R 4.1.0)
RColorBrewer 1.1-3 2022-04-03 CRAN (R 4.1.2)
Rcpp 1.0.8.3 2022-03-17 CRAN (R 4.1.2)
remedy 0.1.0 2018-12-03 CRAN (R 4.1.0)
reshape 0.8.8 2018-10-23 CRAN (R 4.1.0)
reshape2 1.4.4 2020-04-09 CRAN (R 4.1.0)
rmarkdown 2.11 2021-09-14 CRAN (R 4.1.1)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.1.0)
stringi 1.7.6 2021-11-29 CRAN (R 4.1.0)
stringr 1.4.0 2019-02-10 CRAN (R 4.1.0)
survival 3.2-12 2021-08-13 CRAN (R 4.1.1)
tibble 3.1.7 2022-05-03 CRAN (R 4.1.1)
tidyr 1.2.0 2022-02-01 CRAN (R 4.1.2)
usethis 2.0.1 2021-02-10 CRAN (R 4.1.0)

Full Session Info (including loaded but unattached packages — for troubleshooting only)

R version 4.1.1 (2021-08-10)

Platform: x86_64-apple-darwin17.0 (64-bit)

locale: en_US.UTF-8||en_US.UTF-8||en_US.UTF-8||C||en_US.UTF-8||en_US.UTF-8

attached base packages:

  • grid
  • stats
  • graphics
  • grDevices
  • datasets
  • utils
  • methods
  • base

other attached packages:

  • reshape(v.0.8.8)
  • dygraphs(v.1.1.1.6)
  • metafor(v.3.0-2)
  • purrr(v.0.3.4)
  • lubridate(v.1.7.10)
  • reshape2(v.1.4.4)
  • stringi(v.1.7.6)
  • stringr(v.1.4.0)
  • papaja(v.0.1.0.9997)
  • kableExtra(v.1.3.4)
  • Hmisc(v.4.5-0)
  • Formula(v.1.2-4)
  • survival(v.3.2-12)
  • lattice(v.0.20-44)
  • tidyr(v.1.2.0)
  • dplyr(v.1.0.9)
  • plyr(v.1.8.6)
  • data.table(v.1.14.0)
  • mada(v.0.5.10)
  • mvmeta(v.1.0.3)
  • ellipse(v.0.4.2)
  • mvtnorm(v.1.1-2)
  • devtools(v.2.4.2)
  • usethis(v.2.0.1)
  • pander(v.0.6.4)
  • tibble(v.3.1.7)
  • sessioninfo(v.1.1.1)
  • gtsummary(v.1.4.2)
  • jtools(v.2.1.4)
  • nlme(v.3.1-152)
  • lme4(v.1.1-29)
  • Matrix(v.1.3-4)
  • ggpattern(v.0.2.0)
  • gridExtra(v.2.3)
  • plotly(v.4.10.0)
  • RColorBrewer(v.1.1-3)
  • haven(v.2.4.3)
  • ggthemes(v.4.2.4)
  • ggplot2(v.3.3.6)
  • psych(v.2.1.9)
  • brms(v.2.16.1)
  • Rcpp(v.1.0.8.3)
  • bookdown(v.0.24)
  • remedy(v.0.1.0)
  • knitr(v.1.39)
  • rmarkdown(v.2.11)

loaded via a namespace (and not attached):

  • estimability(v.1.3)
  • msm(v.1.6.9)
  • coda(v.0.19-4)
  • multcomp(v.1.4-18)
  • rpart(v.4.1-15)
  • inline(v.0.3.19)
  • generics(v.0.1.2)
  • cowplot(v.1.1.1)
  • callr(v.3.7.0)
  • TH.data(v.1.1-0)
  • proxy(v.0.4-26)
  • chron(v.2.3-56)
  • tzdb(v.0.1.2)
  • webshot(v.0.5.2)
  • xml2(v.1.3.2)
  • httpuv(v.1.6.3)
  • StanHeaders(v.2.21.0-7)
  • assertthat(v.0.2.1)
  • xfun(v.0.30)
  • hms(v.1.1.1)
  • jquerylib(v.0.1.4)
  • bayesplot(v.1.8.1)
  • evaluate(v.0.15)
  • promises(v.1.2.0.1)
  • fansi(v.1.0.3)
  • igraph(v.1.2.6)
  • DBI(v.1.1.1)
  • tmvnsim(v.1.0-2)
  • htmlwidgets(v.1.5.4)
  • horst(v.0.1)
  • tensorA(v.0.36.2)
  • stats4(v.4.1.1)
  • ellipsis(v.0.3.2)
  • crosstalk(v.1.1.1)
  • backports(v.1.4.1)
  • V8(v.3.4.2)
  • insight(v.0.17.0)
  • markdown(v.1.1)
  • RcppParallel(v.5.1.4)
  • vctrs(v.0.4.1)
  • remotes(v.2.4.0)
  • sjlabelled(v.1.1.8)
  • abind(v.1.4-5)
  • cachem(v.1.0.6)
  • withr(v.2.5.0)
  • checkmate(v.2.0.0)
  • emmeans(v.1.6.3)
  • xts(v.0.12.1)
  • prettyunits(v.1.1.1)
  • mnormt(v.2.0.2)
  • svglite(v.2.0.0)
  • cluster(v.2.1.2)
  • lazyeval(v.0.2.2)
  • crayon(v.1.5.1)
  • labeling(v.0.4.2)
  • pkgconfig(v.2.0.3)
  • pkgload(v.1.2.4)
  • nnet(v.7.3-16)
  • rlang(v.1.0.2)
  • lifecycle(v.1.0.1)
  • miniUI(v.0.1.1.1)
  • colourpicker(v.1.1.0)
  • sandwich(v.3.0-1)
  • polycor(v.0.7-10)
  • mathjaxr(v.1.4-0)
  • modelr(v.0.1.8)
  • distributional(v.0.2.2)
  • rprojroot(v.2.0.3)
  • matrixStats(v.0.60.1)
  • datawizard(v.0.4.0)
  • loo(v.2.4.1)
  • boot(v.1.3-28)
  • zoo(v.1.8-9)
  • base64enc(v.0.1-3)
  • gamm4(v.0.2-6)
  • ggridges(v.0.5.3)
  • processx(v.3.5.3)
  • png(v.0.1-7)
  • viridisLite(v.0.4.0)
  • parameters(v.0.17.0)
  • rootSolve(v.1.8.2.2)
  • readr(v.2.0.2)
  • jpeg(v.0.1-9)
  • shinystan(v.2.5.0)
  • ggeffects(v.1.1.1)
  • scales(v.1.2.0)
  • memoise(v.2.0.0)
  • magrittr(v.2.0.3)
  • threejs(v.0.3.3)
  • compiler(v.4.1.1)
  • rstantools(v.2.1.1)
  • snakecase(v.0.11.0)
  • cli(v.3.3.0)
  • ps(v.1.7.0)
  • Brobdingnag(v.1.2-6)
  • htmlTable(v.2.2.1)
  • MASS(v.7.3-54)
  • mgcv(v.1.8-36)
  • tidyselect(v.1.1.2)
  • forcats(v.0.5.1)
  • mixmeta(v.1.1.3)
  • projpred(v.2.0.2)
  • highr(v.0.9)
  • yaml(v.2.3.5)
  • latticeExtra(v.0.6-29)
  • bridgesampling(v.1.1-2)
  • sass(v.0.4.0)
  • tools(v.4.1.1)
  • lmom(v.2.8)
  • parallel(v.4.1.1)
  • rstudioapi(v.0.13)
  • foreign(v.0.8-81)
  • gld(v.2.6.2)
  • posterior(v.1.1.0)
  • farver(v.2.1.0)
  • sjPlot(v.2.8.9)
  • digest(v.0.6.29)
  • shiny(v.1.6.0)
  • broom(v.0.8.0.9000)
  • performance(v.0.9.0)
  • later(v.1.3.0)
  • httr(v.1.4.2)
  • rsconnect(v.0.8.24)
  • effectsize(v.0.6.0.1)
  • sjstats(v.0.18.1)
  • colorspace(v.2.0-3)
  • rvest(v.1.0.1)
  • brio(v.1.1.3)
  • fs(v.1.5.0)
  • splines(v.4.1.1)
  • Scale(v.1.0.4)
  • rematch2(v.2.1.2)
  • expm(v.0.999-6)
  • ltm(v.1.1-1)
  • Exact(v.3.0)
  • renv(v.0.14.0)
  • shinythemes(v.1.2.0)
  • systemfonts(v.1.0.2)
  • xtable(v.1.8-4)
  • jsonlite(v.1.8.0)
  • nloptr(v.1.2.2.2)
  • rstan(v.2.21.2)
  • testthat(v.3.1.4)
  • nFactors(v.2.4.1)
  • gt(v.0.3.1)
  • R6(v.2.5.1)
  • pillar(v.1.7.0)
  • htmltools(v.0.5.2)
  • mime(v.0.12)
  • glue(v.1.6.2)
  • fastmap(v.1.1.0)
  • minqa(v.1.2.4)
  • DT(v.0.19)
  • class(v.7.3-19)
  • codetools(v.0.2-18)
  • pkgbuild(v.1.2.0)
  • utf8(v.1.2.2)
  • bslib(v.0.3.0)
  • curl(v.4.3.2)
  • DescTools(v.0.99.43)
  • gtools(v.3.9.2)
  • shinyjs(v.2.0.0)
  • desc(v.1.4.1)
  • munsell(v.0.5.0)
  • e1071(v.1.7-9)
  • broom.helpers(v.1.4.0)
  • sjmisc(v.2.8.7)
  • gtable(v.0.3.0)
  • bayestestR(v.0.12.1)




References